
import requests
from bs4 import BeautifulSoup

def req(url):
    headersvalue = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36 Edg/94.0.992.50'
        }
    r = requests.get(url, headers=headersvalue)
    r.encoding = 'utf-8'
    soup = BeautifulSoup(r.text, 'lxml')
    return soup

def dl(article_url):
    soup = req(article_url)
    article = soup.find('div', attrs={'class': 'single-content repcontent'})
    title = article.p.string
    pages = soup.find('li', attrs={'class': 'end'}).a.attrs['href'].split('.')[0].split('_')[1]
    pages_list = [article_url]
    for page in range(2,int(pages)+1):
        page_url = article_url.split('.html')[0] + '_' + str(page) + ('.html')
        pages_list.append(page_url)
    for page_url in pages_list:
        soup = req(page_url)
        node_p = soup.find('div', attrs={'class': 'single-content repcontent'}).find_all('p')
        for passage in node_p:
            content = passage.string
            try:
                with open(f'd:/{title}.txt', 'a+', encoding='utf-8') as f:
                    f.write(content + '\n')
            except TypeError:
                continue
    print(f'{title} 下载完成')

def all_info():
    soup = req('https://www.bjhfcw.com/i34/')
    part = soup.find_all('span', attrs={'class': 'catmore'})
    catmore = {}
    for p in part:
        part, url = p.a.attrs['title'].split()[1], p.a.attrs['href']
        catmore[part] = url
    # 情爱世界\情感短篇\伦理短文\两性私语\H文章节
    choice = input('输入分类：\n')
    category_url = catmore[choice]
    index_urls = [category_url]
    for index in range(2,11):
        index_url = '{}index_{}.html'.format(category_url, index)
        index_urls.append(index_url)
    for index_url in index_urls:
        node_h3 = req(index_url).find_all('h3', attrs={'class': 'hw'})
        for h3 in node_h3:
            article_url = h3.a.attrs['href']
            dl(article_url)

def latest_info():
    soup = req('https://www.bjhfcw.com/')
    node_li = soup.find('div', attrs={'class': 'line-one'}).find_all('li')
    for li in node_li:
        article_url = 'https://www.bjhfcw.com' + li.a.attrs['href']
        dl(article_url)





